Project 2
Extract 10000 tweets from Twitter using twitteR package including retweets.
Subset the retweets and the original tweets into a separate file
Plot the retweets and the original tweets using bar graph in vertical manner.
Include legends
Extract 10000 tweets from Twitter using twitteR package including retweets.
library(wordcloud)
## Loading required package: RColorBrewer
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RColorBrewer)
library(ggplot2)
library(twitteR)
##
## Attaching package: 'twitteR'
## The following objects are masked from 'package:dplyr':
##
## id, location
SET-UP CREDENTIALS
CONSUMER_SECRET <- "laGMDCfJUSOReAgyw0Wbpxsw5mgbzb8lqo7cC2WU6tB4vrCrpB"
CONSUMER_KEY <- "3vdJJj4emRTPkNuNrp8dStj49"
ACCESS_SECRET <- "RNH4qsyvtRawmwH11qUargkQCKxf5tjvbsmZEQlYsF5Ud"
ACCESS_TOKEN <- "1592858497888444417-3jUbZnrov2hNroXf5KGV1M5rxIQIL0"
CONNECT TO TWITTER APP
setup_twitter_oauth(consumer_secret = CONSUMER_SECRET,
consumer_key = CONSUMER_KEY,
access_secret = ACCESS_SECRET,
access_token = ACCESS_TOKEN
)
## [1] "Using direct authentication"
Get 10000 observations “excluding retweets.
trendTweets <- searchTwitter("Philippines",
n = 10000,
lang = "en",
since = "2022-11-24",
until = "2022-12-30",
retryOnRateLimit=120)
## [1] "Rate limited .... blocking for a minute and retrying up to 119 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 118 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 117 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 116 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 115 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 114 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 113 times ..."
twitter list to data frame
philippinesDF <- twListToDF(trendTweets)
Save and Load Data frame files
save(philippinesDF, file = "philippinesDF.Rdata")
load(file = "philippinesDF.Rdata")
Checking for missing values in data frame
saple_data <- sapply(philippinesDF, function(x) sum(is.na(x)))
Tweets
Subsetting using the dplyr() PACKAGE.
philippines_tweets <- philippinesDF %>%
select(screenName,text,created, isRetweet) %>% filter(isRetweet == FALSE)
save(philippines_tweets, file = "philippines_tweetsF.Rdata")
load(file = "philippines_tweets.Rdata")
## Warning in readChar(con, 5L, useBytes = TRUE): cannot open compressed file
## 'philippines_tweets.Rdata', probable reason 'No such file or directory'
## Error in readChar(con, 5L, useBytes = TRUE): cannot open the connection
Grouping the data created.
philippines_tweets %>%
group_by(1) %>%
summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
## `1` max min
## <dbl> <dttm> <dttm>
## 1 1 2022-12-11 12:55:53 2022-12-11 06:20:02
philippines_data <- philippines_tweets %>% mutate(Created_At_Round = created %>% round(units = 'hours') %>% as.POSIXct())
mn <- philippines_tweets %>% pull(created) %>% min()
mx <- philippines_tweets %>% pull(created) %>% max()
Plot tweets using the library(plotly) and ggplot().
philData_plotting <- ggplot(philippines_data, aes(x = Created_At_Round)) +
geom_histogram(aes(fill = ..count..)) +
theme(legend.position = "right") +
xlab("Time") + ylab("Number of tweets") +
scale_fill_gradient(low = "midnightblue", high = "aquamarine4")
philData_plotting %>% ggplotly()
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## ℹ The deprecated feature was likely used in the ggplot2 package.
## Please report the issue at <]8;;https://github.com/tidyverse/ggplot2/issueshttps://github.com/tidyverse/ggplot2/issues]8;;>.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Retweets
philippines_tweets2 <- philippinesDF %>%
select(screenName,text,created, isRetweet) %>% filter(isRetweet == TRUE)
Grouping the data created
philippines_tweets2 %>%
group_by(1) %>%
summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
## `1` max min
## <dbl> <dttm> <dttm>
## 1 1 2022-12-11 12:55:54 2022-12-11 06:19:34
philippines_data2 <- philippines_tweets2 %>% mutate(Created_At_Round = created %>% round(units = 'hours') %>% as.POSIXct())
mn <- philippines_tweets2 %>% pull(created) %>% min()
mx <- philippines_tweets2 %>% pull(created) %>% max()
Plot retweets using the library(plotly) and ggplot().
philData_plotting2 <- ggplot(philippines_data2, aes(x = Created_At_Round)) +
geom_histogram(aes(fill = ..count..)) +
theme(legend.position = "right") +
xlab("Time") + ylab("Number of tweets") +
scale_fill_gradient(low = "midnightblue", high = "aquamarine4")
philData_plotting2 %>% ggplotly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.